*--------------------------------------------------------------------*
*
*                   Rothstein Replication & Extension                 *
*                   Code: Matt kraft                                  *
*                   Date: 4/17/10                                     *
*---------------------------------------------------------------------*
clear
set more off
clear results
set mem 500m

cd "E:\gov2001\California Data\District by Year Spreadsheets"

*------------------------------------------------------------------------------*
*             Merging all Years of district outcome data from Ed-data website  *
* -----------------------------------------------------------------------------*

* NOTE: for some reasons the files 9293 - 0708 lost the variable name "Number of Teachers" 
* I had to go back to the csv files and fix this by inserting the name right before Number of Full Time Equivalents


insheet using "0708.csv",clear
gen year=2008
save "08.dta", replace
insheet using "0607.csv", clear
gen year=2007
save "07.dta", replace
insheet using "0506.csv",clear
gen year=2006
save "06.dta", replace
insheet using "0405.csv", clear
gen year=2005
save "05.dta", replace
insheet using "0304.csv",clear
gen year=2004
save "04.dta", replace
insheet using "0203.csv", clear
gen year=2003
save "03.dta", replace
insheet using "0102.csv",clear
gen year=2002
save "02.dta", replace
insheet using "0001.csv", clear
gen year=2001
save "01.dta", replace
insheet using "9900.csv",clear
gen year=2000
save "00.dta", replace
insheet using "9899.csv", clear
gen year=1999
save "99.dta", replace
insheet using "9798.csv",clear
gen year=1998
save "98.dta", replace
insheet using "9697.csv", clear
gen year=1997
save "97.dta", replace
insheet using "9596.csv",clear
gen year=1996
save "96.dta", replace
insheet using "9495.csv", clear
gen year=1995
save "95.dta", replace
insheet using "9394.csv",clear
gen year=1994
save "94.dta", replace
insheet using "9293.csv", clear
gen year=1993
save "93.dta", replace

* starts with 93 in memory and appends other files
forvalues n=4/9 {
	append using 9`n'.dta
	}

forvalues n=0/8 {
	append using 0`n'.dta
	}
** cleaning merging variables
rename districtcode district_code
destring district_code , gen(temp) ignore("") force
split district_code, gen(temp2)
compress
sort district_code year

cd "E:\gov2001"

save "Source\dist_outcome.dta", replace

** then I copy and past the file into excel and use text to columns to chop off leading space in district and county vars
insheet using "Source\district_outcome.csv" , clear
save "Source\dist_outcome.dta", replace


*-------------------------------------------------------------------------------------*
*
*       Preparing Bond and financial data in the same fashion as Rothstein            *
*
*-------------------------------------------------------------------------------------*


*-----------------------------------------------------------------------------------*
*                       Preparing finance data from Rothstein file
*                     Original source of the data : http://www.nces.ed.gov/ccd.     *
*-----------------------------------------------------------------------------------*

cd "E:\gov2001"

use "Source\leafinance_panel_ca.dta", clear

keep if stfips==6
 replace cofips=cofips-6000 if cofips>6000 & cofips<7000

drop if leaid=="" | leaid=="N"

*Drop districts that arent elementary/secondary/unified
 drop if level>3

*Check that county is constant within LEA over time
 sort leaid year
 by leaid: gen firstco=cofips[1]
 count if cofips~=firstco

*Assign each LEA to the modal county
 by leaid: egen modeco=mode(cofips)
 *One district has 4 of each -- break the tie for the earlier county, as this dist is later absorbed
  replace modeco=19 if leaid=="0600023"
 gen ismode=(cofips==modeco)
 by leaid: egen frmode=mean(ismode)
 replace cofips=modeco
 rename frmode cofips_recode
 drop modeco ismode



gen newleaid=leaid

replace newleaid="0636805" if leaid=="0600023"
replace newleaid="0600076" if leaid=="0600024"
replace newleaid="0600027" if leaid=="0600027"
replace newleaid="0600113" if leaid=="0600030"
replace newleaid="0600116" if leaid=="0600041"
replace newleaid="0636120" if leaid=="0600043"
replace newleaid="0691092" if leaid=="0600119"
replace newleaid="0600153" if leaid=="0601910"
replace newleaid="0600153" if leaid=="0601930"
replace newleaid="0600020" if leaid=="0602490"
replace newleaid="0600017" if leaid=="0602880"
replace newleaid="0636805" if leaid=="0603450"
replace newleaid="0614950" if leaid=="0604050"
replace newleaid="0691137" if leaid=="0604920"
replace newleaid="0614950" if leaid=="0606240"
replace newleaid="0600049" if leaid=="0607110"
replace newleaid="0691134" if leaid=="0607320"
replace newleaid="0614950" if leaid=="0607380"
replace newleaid="0600067" if leaid=="0607620"
replace newleaid="0600067" if leaid=="0607650"
*replace newleaid="0600023" if leaid=="0608280"
replace newleaid="0600049" if leaid=="0609180"
replace newleaid="0629540" if leaid=="0609666"
replace newleaid="0600039" if leaid=="0610920"
replace newleaid="0600047" if leaid=="0610990"
replace newleaid="0600065" if leaid=="0611160"
replace newleaid="0600065" if leaid=="0611190"
replace newleaid="0600033" if leaid=="0611370"
replace newleaid="0600033" if leaid=="0611410"
replace newleaid="0600026" if leaid=="0612240"
replace newleaid="0600027" if leaid=="0612540"
replace newleaid="0600027" if leaid=="0612570"
replace newleaid="0602250" if leaid=="0612660"
replace newleaid="0600052" if leaid=="0613040"
replace newleaid="0600052" if leaid=="0613060"
replace newleaid="0600035" if leaid=="0613590"
replace newleaid="0600037" if leaid=="0613660"
replace newleaid="0600037" if leaid=="0613680"
replace newleaid="0609030" if leaid=="0614640"
replace newleaid="0600046" if leaid=="0615540"
replace newleaid="0600046" if leaid=="0615570"
replace newleaid="0600051" if leaid=="0616140"
replace newleaid="0600051" if leaid=="0616170"
replace newleaid="0600018" if leaid=="0616710"
replace newleaid="0600044" if leaid=="0616770"
replace newleaid="0600044" if leaid=="0616800"
replace newleaid="0691134" if leaid=="0616890"
replace newleaid="0600011" if leaid=="0617010"
replace newleaid="0600060" if leaid=="0617910"
replace newleaid="0600060" if leaid=="0617940"
replace newleaid="0600018" if leaid=="0618090"
replace newleaid="0600011" if leaid=="0622530"
replace newleaid="0600014" if leaid=="0622780"
replace newleaid="0600015" if leaid=="0623070"
replace newleaid="0600025" if leaid=="0623580"
replace newleaid="0600038" if leaid=="0624120"
replace newleaid="0600022" if leaid=="0624510"
replace newleaid="0600019" if leaid=="0626520"
replace newleaid="0600029" if leaid=="0626550"
replace newleaid="0600036" if leaid=="0626700"
replace newleaid="0600116" if leaid=="0627540"
replace newleaid="0600062" if leaid=="0627960"
replace newleaid="0600062" if leaid=="0627990"
replace newleaid="0643370" if leaid=="0628020"
replace newleaid="0600045" if leaid=="0628890"
replace newleaid="0600045" if leaid=="0628920"
replace newleaid="0600033" if leaid=="0628980"
replace newleaid="0600031" if leaid=="0629310"
replace newleaid="0606390" if leaid=="0629730"
replace newleaid="0600048" if leaid=="0629970"
replace newleaid="0600048" if leaid=="0630000"
replace newleaid="0600069" if leaid=="0630060"
replace newleaid="0600048" if leaid=="0630300"
replace newleaid="0600020" if leaid=="0631080"
replace newleaid="0600064" if leaid=="0631410"
replace newleaid="0600064" if leaid=="0631440"
replace newleaid="0600032" if leaid=="0632100"
replace newleaid="0621450" if leaid=="0632490"
replace newleaid="0600061" if leaid=="0633000"
replace newleaid="0600040" if leaid=="0633030"
replace newleaid="0600040" if leaid=="0633060"
replace newleaid="0600013" if leaid=="0633300"
replace newleaid="0634425" if leaid=="0634420"
replace newleaid="0691134" if leaid=="0634530"
replace newleaid="0600012" if leaid=="0634610"
replace newleaid="0691136" if leaid=="0634660"
replace newleaid="0605580" if leaid=="0635640"
replace newleaid="0614040" if leaid=="0636060"
replace newleaid="0614950" if leaid=="0636540"
replace newleaid="0636805" if leaid=="0636720"
replace newleaid="0636805" if leaid=="0636770"
replace newleaid="0600001" if leaid=="0637080"
replace newleaid="0600032" if leaid=="0637410"
replace newleaid="0600116" if leaid=="0637740"
replace newleaid="0600021" if leaid=="0638490"
replace newleaid="0600028" if leaid=="0638940"
replace newleaid="0600047" if leaid=="0639480"
replace newleaid="0600047" if leaid=="0639510"
replace newleaid="0691134" if leaid=="0639540"
replace newleaid="0691134" if leaid=="0639570"
replace newleaid="0600158" if leaid=="0640050"
replace newleaid="0600158" if leaid=="0640080"
replace newleaid="0600016" if leaid=="0640440"
replace newleaid="0691135" if leaid=="0640620"
replace newleaid="0600069" if leaid=="0640770"
replace newleaid="0600042" if leaid=="0641370"
replace newleaid="0600012" if leaid=="0641490"
replace newleaid="0604080" if leaid=="0641670"
replace newleaid="0600063" if leaid=="0641700"
replace newleaid="0626280" if leaid=="0642390"
replace newleaid="0600034" if leaid=="0642870"
replace newleaid="0630660" if leaid=="0643350"
replace newleaid="0622110" if leaid=="0691053"
replace newleaid="0600085" if leaid=="0691054"
replace newleaid="0600088" if leaid=="0691056"
replace newleaid="0600150" if leaid=="0691057"
replace newleaid="0600090" if leaid=="0691060"
replace newleaid="0600092" if leaid=="0691061"
replace newleaid="0600093" if leaid=="0691064"
replace newleaid="0600096" if leaid=="0691065"
replace newleaid="0600097" if leaid=="0691066"
replace newleaid="0600098" if leaid=="0691067"
replace newleaid="0600099" if leaid=="0691068"
replace newleaid="0600100" if leaid=="0691069"
replace newleaid="0600101" if leaid=="0691070"
replace newleaid="0600102" if leaid=="0691071"
replace newleaid="0600073" if leaid=="0691072"
replace newleaid="0600074" if leaid=="0691073"
replace newleaid="0600075" if leaid=="0691074"
replace newleaid="0600077" if leaid=="0691075"
replace newleaid="0600078" if leaid=="0691076"
replace newleaid="0600079" if leaid=="0691077"
replace newleaid="0600103" if leaid=="0691079"
replace newleaid="0600105" if leaid=="0691081"
replace newleaid="0600106" if leaid=="0691082"
replace newleaid="0600108" if leaid=="0691084"
replace newleaid="0622710" if leaid=="0691085"
replace newleaid="0600110" if leaid=="0691086"
replace newleaid="0600111" if leaid=="0691087"
replace newleaid="0600112" if leaid=="0691088"
replace newleaid="0600117" if leaid=="0691089"
replace newleaid="0600118" if leaid=="0691091"
replace newleaid="0600081" if leaid=="0691094"
replace newleaid="0600120" if leaid=="0691095"
replace newleaid="0600080" if leaid=="0691096"
replace newleaid="0600121" if leaid=="0691097"
replace newleaid="0600122" if leaid=="0691098"
replace newleaid="0600123" if leaid=="0691099"
replace newleaid="0600124" if leaid=="0691100"
replace newleaid="0600082" if leaid=="0691101"
replace newleaid="0600083" if leaid=="0691103"
replace newleaid="0600094" if leaid=="0691104"
replace newleaid="0600095" if leaid=="0691105"
replace newleaid="0600130" if leaid=="0691107"
replace newleaid="0600131" if leaid=="0691108"
replace newleaid="0600132" if leaid=="0691109"
replace newleaid="0600138" if leaid=="0691110"
replace newleaid="0600139" if leaid=="0691113"
replace newleaid="0600140" if leaid=="0691114"
replace newleaid="0600137" if leaid=="0691116"
replace newleaid="0600143" if leaid=="0691117"
replace newleaid="0600125" if leaid=="0691118"
replace newleaid="0600157" if leaid=="0691121"
replace newleaid="0600128" if leaid=="0691123"
replace newleaid="0600129" if leaid=="0691124"
replace newleaid="0600145" if leaid=="0691126"
replace newleaid="0600146" if leaid=="0691127"
replace newleaid="0600147" if leaid=="0691128"
replace newleaid="0600148" if leaid=="0691129"
replace newleaid="0600149" if leaid=="0691130"
replace newleaid="0600141" if leaid=="0691131"
replace newleaid="0600142" if leaid=="0691132"
replace newleaid="0600133" if leaid=="0691133"

drop if newleaid==""
duplicates report newleaid year

tempfile newleaids
save `newleaids'

*Make a crosswalk
 keep leaid year newleaid
 sort leaid year
 save Source\newleaids_xwalk.dta, replace

*Prepare data from the CCD agency files for analysis with school bonds data
 use if stfips==6 & year>=1996 using "Source\agpanel.dta"
*NOTE:  This is another spot where I think he is using the definition of year as fall year of the school calendar 
 replace year=year-1
 keep leaid enroll enrollg nskls nteach year
 sort leaid year
 *Merge to new leaids
  merge leaid year using Source\newleaids_xwalk, unique
 tab _merge
 keep if _merge==3
 drop _merge
 sort leaid year
 tempfile ccdagdat
 save `ccdagdat'

use `newleaids'
sort leaid year
merge leaid year using `ccdagdat', unique
drop _merge

#delimit  ;

sort newleaid year;
foreach v of varlist v33 totalrev tfedrev tstrev c11 tlocrev t06 t09 t15 t40 t99  t02 d23 d11
                     a07 a08 a09 a11 a13 a20 a15 u22 u97 totalexp tcurinst tcurssvc v40 v45
                     tcuroth v60 tnonelse tcapout f12 k09 k10 k11 g15 tcurelsc l12 m12 q11
                     i86 z32 z33 v13 v15 v17 z34 _19h _21f _31f _41f _61v _66v c01 c12 c13
                     c35 w01 w31 w61 enroll enrollg nskls nteach {;
   replace `v'=. if `v'<0;
  };

collapse (sum) v33 totalrev tfedrev tstrev c11 tlocrev t06 t09 t15 t40 t99  t02 d23 d11
a07 a08 a09 a11 a13 a20 a15 u22 u97 totalexp tcurinst tcurssvc v40 v45 tcuroth v60 tnonelse tcapout
f12 k09 k10 k11 g15 tcurelsc l12 m12 q11 i86 z32 z33 v13 v15 v17 z34 _19h _21f _31f _41f _61v _66v
c01 c12 c13 c35 w01 w31 w61
enroll enrollg nskls nteach
(median) csa cbsa stfips cofips, by(newleaid year);
#delimit cr


*note: i am losing many variables, only keep newleaid year + those which make sense to be added
sort newleaid year


label var newleaid "Consistent LEAID over time"

save Data\financepanel.dta, replace

*-----------------------------------------------------------------------------------*
*                       Clearning BOND data and merging on finance data             *      
*                                                                                   *
*-----------------------------------------------------------------------------------*

use "Source\referenda_newleaids_capital.dta", clear

** Note: He is using yearref as a Fall academic year - we will need to change to spring year **

 rename year yearref
 *note: we are replacing calendar year with schoolyear
  clonevar year=yearref
  replace yearref = yearref - 1 if month<5
  label var yearref "schoolyear starting yr of elec."
 
*change req threshold and type in one election, look at :\\coauthor\referenda_graphing_RDD.do for details
  replace req="M" if newleaid=="0615180" & yearref==1992
  replace election_type="Other" if newleaid=="0615180" & yearref==1992

 *Select sample - the 1986 data is sparse and unreliable
   keep if yearref>=1987

  *Keep only GO Bonds and parcel taxes
   keep if election_type== "GO Bond"  | election_type=="Parcel Tax"
   gen gobond=(election_type=="GO Bond")
   gen parcel_cap=(capitaloutlays=="Y")*(gobond==0)

  *Identify measures where percent, passfail, and req dont all match
*Note: mv stands for margin of victory
   gen mv=percent-req_num
*Note: I think stata codes missing values . as being huge numbers
   gen badobs=(mv>=0 & mv<. & passfail=="F")
   replace badobs=1 if mv<0 & passfail=="P"
   replace badobs=1 if mv==.
   count if badobs==1 & percent<.
   assert r(N)==1
   count if badobs==1 & percent==.
   replace percent=. if badobs==1
   label var mv "voteshare-based MARGIN of VICTORY"

  *If there are multiple measures that meet these criteria, keep winning over losing measures
  *and then keep only the highest vote share.  Break ties by (1) GO over parcel (2) lowest requirement.
   gen win=(passfail=="P") if badobs~=1
   gen fakewin=(percent>=66.7) if percent<. & req_num<=56
   replace fakewin=(percent>=55) if percent<. & req_num>=66 & req_num<.
   assert req_num<=56 | req_num>=66
   label var fakewin "Indicator for pass at counterfactual threshold"
*Note: gsort allows you to sort in a specified assending or decending order across vars.
   gsort newleaid yearref badobs -gobond -win -percent req_num 
*Note: this give you a count of the unique by the specified vars
   by newleaid yearref: gen numelec=_N
   by newleaid yearref: gen numbad=sum(badobs)
   by newleaid yearref: replace badobs=(numbad[_N]>0)
*Note: THIS IS A BIG DECISION WHERE HE KEEPS ONLY ONE BOND PASSED PER YEAR IN A SCHOOL WE MIGHT ULTIMATELY DISAGREE WITH - CHECK
   by newleaid yearref: keep if _n==1
   label var numelec "# of GO/parcel elecs in same year"

 *Form counts of measures per district since 1983
  sort newleaid yearref
  by newleaid: gen measnum=_n
  by newleaid: gen nummeas=_N
  label var measnum "Measure number (chronological) in district"
  label var nummeas "Number of measures in district in sample"
  tab measnum
  tab nummeas if measnum==nummeas
  su measnum, meanonly
  local maxnmeas=r(max)

 sort newleaid yearref
*NOTE: this checks whether newleaid and yearref uniquely identify observations in the data
 isid newleaid yearref
 compress
 save Data\referenda, replace

*NOTE: here he creates a temporary file to keep manipulating the data set
tempfile referenda referenda_wide
 sort newleaid yearref
 gen refid=_n
 *We only need a few variables
  *keep newleaid yearref percent win fakewin req_num numelec gobond parcel_cap    MK
   *refid measnum nummeas proposed_bond      MK
 *NOTE I add the drop of req which needs to happen for things to work - it is the string for req_num which we rename
 drop req
 rename req_num req
 *NOTE: I add the compress
 compress
 save `referenda'

*Make counts of referenda to date and victories to date
use `referenda'
gen meastodate=measnum
by newleaid (yearref): gen winstodate=sum(win)
keep newleaid yearref meastodate winstodate 
*NOTE: if vars are different dimentions this fills in missing parts of their rectangular matrix so all interactions in the data set
fillin newleaid yearref
sort newleaid yearref
by newleaid: replace meastodate=0 if _n==1 & _fillin
by newleaid: replace winstodate=0 if _n==1 & _fillin
by newleaid: replace meastodate=meastodate[_n-1] if meastodate==.
by newleaid: replace winstodate=winstodate[_n-1] if winstodate==.
drop _fillin
sort newleaid yearref

*NOTE: THIS IS A VERY BIG DECISION HERE ABOUT HOW TO MATCH SCHOOL YEARS TO FISCAL YEARS * 
rename yearref year      
tempfile refs2date
save `refs2date'

*Merge to the finance data, and make leads and lags of the referenda variables
 use "Data\financepanel.dta", clear
 *Drop observations with <50 enrollment
  drop if v33<50
 sort newleaid year
 merge newleaid year using `refs2date', unique
 tab _merge
 sort newleaid year
 by newleaid: replace meastodate=0 if _merge==1 & _n==1
 by newleaid: replace winstodate=0 if _merge==1 & _n==1
 by newleaid: replace meastodate=meastodate[_n-1] if _merge==1 & _n>1
 by newleaid: replace winstodate=winstodate[_n-1] if _merge==1 & _n>1
 drop _merge
 sort newleaid year
 *Fill in measures/wins for observations pre CCD
  fillin newleaid year
  sort newleaid year
  by newleaid: replace meastodate=0 if _n==1 & meastodate==.
  by newleaid: replace winstodate=0 if _n==1 & winstodate==.
  by newleaid: replace meastodate=meastodate[_n-1] if _n>1 & meastodate==.
  by newleaid: replace winstodate=winstodate[_n-1] if _n>1 & winstodate==.
 tempfile financedat
 save `financedat'

*Form average enrollment per year, to use as denominator for bond size measure
 use if v33<. using `financedat'
 collapse (mean) v33, by(newleaid)
 rename v33 avgenrollment
 label var avgenrollment "Avg. district enrollment across all yrs"
 sort newleaid

*Another set of outcome variables is the presence of an initiative on the ballot
 merge newleaid using `referenda', uniqmaster
 tab _merge
 drop if _merge==1
 drop _merge
 destring proposed_bond, gen(bondamt_thisyr)
 replace bondamt_thisyr=0 if win~=1 | bondamt_thisyr==.
 *Convert to real 2000$, using "West" CPI-U (CUUR0400SA0,CUUS0400SA0)
  replace bondamt_thisyr=bondamt_thisyr*174.8/102.3 if yearref==1984
  replace bondamt_thisyr=bondamt_thisyr*174.8/106.8 if yearref==1985
  replace bondamt_thisyr=bondamt_thisyr*174.8/109.8 if yearref==1986
  replace bondamt_thisyr=bondamt_thisyr*174.8/113.2 if yearref==1987
  replace bondamt_thisyr=bondamt_thisyr*174.8/117.7 if yearref==1988
  replace bondamt_thisyr=bondamt_thisyr*174.8/123.3 if yearref==1989
  replace bondamt_thisyr=bondamt_thisyr*174.8/129.4 if yearref==1990
  replace bondamt_thisyr=bondamt_thisyr*174.8/136.2 if yearref==1991
  replace bondamt_thisyr=bondamt_thisyr*174.8/141.0 if yearref==1992
  replace bondamt_thisyr=bondamt_thisyr*174.8/145.5 if yearref==1993
  replace bondamt_thisyr=bondamt_thisyr*174.8/148.7 if yearref==1994
  replace bondamt_thisyr=bondamt_thisyr*174.8/152.9 if yearref==1995
  replace bondamt_thisyr=bondamt_thisyr*174.8/156.6 if yearref==1996
  replace bondamt_thisyr=bondamt_thisyr*174.8/160.6 if yearref==1997
  replace bondamt_thisyr=bondamt_thisyr*174.8/163.6 if yearref==1998
  replace bondamt_thisyr=bondamt_thisyr*174.8/167.8 if yearref==1999
  replace bondamt_thisyr=bondamt_thisyr*174.8/173.1 if yearref==2000
  replace bondamt_thisyr=bondamt_thisyr*174.8/180.2 if yearref==2001
  replace bondamt_thisyr=bondamt_thisyr*174.8/184.0 if yearref==2002
  replace bondamt_thisyr=bondamt_thisyr*174.8/188.2 if yearref==2003
  replace bondamt_thisyr=bondamt_thisyr*174.8/191.9 if yearref==2004
  replace bondamt_thisyr=bondamt_thisyr*174.8/197.1 if yearref==2005
  replace bondamt_thisyr=bondamt_thisyr*174.8/204.5 if yearref==2006
  replace bondamt_thisyr=bondamt_thisyr*174.8/210.890 if yearref==2007
 
 *NOTE; here he used average enrollment over the course of the entire panel of data as the denominator rather then yearly average
 gen bondamt_thisyr_pp=bondamt_thisyr/avgenrollment
 *MK This is where he drops the vars from the orignal bond file
 keep newleaid yearref gobond parcel_cap win bondamt_thisyr_pp    
 rename gobond isgobond
 rename parcel_cap isparcelcap
 gen byte ismeasure=1
 rename win iswin
 label var isgobond "Is a GO Bond on ballot this yr?"
 label var ismeasure "Is any Bond/parcel tax on ballot this yr?"
 label var isparcelcap "Is a parcel tax w/capital on ballot this yr?"
 label var iswin "Did measure on ballot win?"
 label var bondamt_thisyr_pp "Size of bond passed this year/avg. enroll"
 fillin newleaid yearref
 foreach v of varlist isgobond isparcelcap ismeasure iswin bondamt_thisyr_pp {
 	replace `v'=0 if _fillin==1
 }
 drop _fillin
 
 *NOTE: this is my addition to retain more info about the year vars instead of loosing it.
 *rename year year_vote                  
 
 rename yearref year
 sort newleaid year
 merge newleaid year using `financedat', unique
 tab _merge
 foreach v of varlist isgobond isparcelcap ismeasure iswin bondamt_thisyr_pp {
 	replace `v'=0 if _merge==2
 }
 drop _merge
 sort newleaid year 
 * delete req from the sore b/c it appears to me to be in the using database not master
 
 *NOTE: I add this to deal with restricted memory problems
compress
*drop source percent_string passfail altname* _merge_new*
 
 *NOTE: so I think this is something akin to merging but not sure about the nuanced differences
 * Here he basically cleans up the core data which he used fillin to make rectangualar and at the end he comes back
 * to add all the other vars for th relevant observations - cool.
 joinby newleaid using `referenda', unmatched(none)
 
 *------------------------------------------------------------------------------------*
 *            I am breaking up Jesse's code from where he is merging to now when      *
 *            he is creating variables fore analyssi                                  *
 * -----------------------------------------------------------------------------------*
 gen dyear=year-yearref

 *keep if dyear>=-2 & dyear<=6
  keep if dyear>=-19 & dyear<=19

*Clean up some variables
 gen dltdebt=_41f-_19h
 gen dstdebt=_66v-_61v
 *note: this generates a column of ceros
 *note: checking back, since leafinane_panel_ca those variables were only zeros
  rename _21f ltdebtiss
  rename _31f ltdebtret
  rename _41f ltdebt
  rename i86 interest
  
  *NOTE: notice we do have this local property tax variable that will allow us to monotize parcel referenda
  rename t06 locproptax
  rename v40 curexp_oper
  rename v45 curexp_xport
  rename v60 curexp_entop
  rename f12 cap_constr
  rename g15 cap_land
  gen cap_equip=k09+k10+k11
  rename z33 instrsal
  rename _66v stdebt
  rename w01 assets_sink
  rename w31 assets_bond
  rename w61 assets_other
  rename u22 interestearned
*Current non-instructional expenditure:
*(Total current elem/sec - instructional) + total non-elem/sec
* Excludes the following that are otherwise included in total expenditures:
* L12: Payments to state governments
* M12: Payments to local governments
* Q11: Payments to other school systems
* I86: Interest on debt
 gen tcurnoninst=tcurelsc-tcurinst+tnonelse
 gen igpmts = l12 + m12 + q11
 gen totassets=assets_sink+assets_bond+assets_other

gen othlocrev = tlocrev - locproptax - interestearned
gen othcurelsc = tcurelsc - tcurinst - curexp_oper
gen netrev = totalrev - totalexp
gen totdebt = ltdebt + stdebt
gen netassets = totassets - totdebt

destring proposed_bond, gen(bondamt)



set more off
#delimit ;
foreach v of varlist tcapout tcurelsc totalexp tcurinst dltdebt ltdebtiss ltdebtret ltdebt interest
             locproptax curexp_oper curexp_xport curexp_entop cap_constr cap_land cap_equip instrsal
             tstrev tfedrev c11 c12 c01 c13 c35 tcurnoninst
             stdebt assets_sink assets_bond assets_other dstdebt tlocrev interestearned
             totalrev tnonelse igpmts totassets othlocrev othcurelsc netrev totdebt
             bondamt
             {;
 *Convert to real 2000$, using "West" CPI-U (CUUR0400SA0,CUUS0400SA0)
  replace `v'=`v'*174.8/102.3 if year==1984;
  replace `v'=`v'*174.8/106.8 if year==1985;
  replace `v'=`v'*174.8/109.8 if year==1986;
  replace `v'=`v'*174.8/113.2 if year==1987;
  replace `v'=`v'*174.8/117.7 if year==1988;
  replace `v'=`v'*174.8/123.3 if year==1989;
  replace `v'=`v'*174.8/129.4 if year==1990;
  replace `v'=`v'*174.8/136.2 if year==1991;
  replace `v'=`v'*174.8/141.0 if year==1992;
  replace `v'=`v'*174.8/145.5 if year==1993;
  replace `v'=`v'*174.8/148.7 if year==1994;
  replace `v'=`v'*174.8/152.9 if year==1995;
  replace `v'=`v'*174.8/156.6 if year==1996;
  replace `v'=`v'*174.8/160.6 if year==1997;
  replace `v'=`v'*174.8/163.6 if year==1998;
  replace `v'=`v'*174.8/167.8 if year==1999;
  replace `v'=`v'*174.8/173.1 if year==2000;
  replace `v'=`v'*174.8/180.2 if year==2001;
  replace `v'=`v'*174.8/184.0 if year==2002;
  replace `v'=`v'*174.8/188.2 if year==2003;
  replace `v'=`v'*174.8/191.9 if year==2004;
  replace `v'=`v'*174.8/197.1 if year==2005;
  replace `v'=`v'*174.8/204.5 if year==2006;
  replace `v'=`v'*174.8/210.890 if year==2007;
 	gen `v'_pp=`v'/v33;
	 *NOTE: Here he basically recodes outliers - interesting approach - seems a little cavalier;
  *Truncate top & bottom 1% of each;
   qui su `v'_pp, d;
   replace `v'_pp=r(p1) if `v'_pp<r(p1);
   replace `v'_pp=r(p99) if `v'_pp>r(p99) & `v'_pp<.;
 };
 #delimit cr

  
*replace totalexp_pp = totalexp_pp/1000
*replace tcurelsc_pp = tcurelsc_pp/1000
*replace tcapout_pp= tcapout_pp/1000


compress
save "Data\recursivepanel.dta", replace

*---*THIS IS ME CLEANING UP SOME VARAIBLES THAT WE COULD POSSIBLY UES
rename v33 fall_mem
label var fall_mem "fall membership"

rename locproptax lprop_tax
label var lprop_tax "local property taxes"

rename t09 lgsale_tax
label var lgsale_tax "local general sales taxes"

rename t15 lutil_tax
label var lutil_tax "local utilities taxes"

rename t40 lincome_tax
label var lincome_tax "local income and coperate net income tax"

rename t99 lother_tax
label var lother_tax "local tax revenue from other sources"

rename curexp_oper oper_pltmaint
label var oper_pltmaint "operations and maintenance of plant"

rename cap_constr capout_constr
label var capout_constr "capital outlay construction"

rename k09 capout_inst_eq
label var capout_inst_eq "capital outlay instructional equipment"

rename k10 capout_noninst_eq
label var capout_noninst_eq "capital outlay non-instructional equipment"

rename k11 capout_eq_unspec
label var capout_eq_unspec "capital outlay equipment unspecified"

rename cap_land capout_land_exist_struc
label var capout_land_exist_struc "capital outlay for land and pre-existing structures"

* this is just a label ok!
label var tcurelsc "total current expend for elem and secondary"

rename z32 totlsal_elmsec
label var totlsal_elmsec "total current expenditures for elementary and secondary ed"

rename instrsal sal_instr_only
label var sal_instr_only "salaries for instruction only"

rename v13 sal_instr_staffsup
label var sal_instr_staffsup "salaries for instructional staff support"

rename v15 sal_genadmin
label var sal_genadmin "salaries general administration"

rename v17 sal_schadmin
label var sal_schadmin "salaries school administration"

save "Data\recursivepanel.dta", replace

*-----------------------------------------------------------------------------------*
*                           Merging dist_outcomes to core bond data set             *
*                                   This is all my coding now                       *
*-----------------------------------------------------------------------------------*

use "Data\recursivepanel.dta", clear
destring district_code, replace

*NOTE: here I am switching the value of year in the recursive data set!!!!!
clonevar yearfall=year 
label var yearfall "fall year of school year"
gen yearspring= year + 1
drop year
rename yearspring year
label var year "spring year of school year"

** destrining district_code
destring district_code, replace
sort district_code year
merge m:1 district_code year using "Source\dist_outcome.dta" 
tab year _merge
keep if _merge==3
compress

** we have matched outcomes from 1993 to 2006!!!!!
* note we are not matching about 20-30 observations of 1554 per year in the data.  Not sure why

*dropping unused vars
drop c11 t02-u97 l12 m12 q11 z34 _19h _61v  c01-c35 _fillin source altname* _merge* curexp_xport curexp_xport curexp_entop interest ltdebtiss-assets_other 

save "Data\bond_outcomes.dta", replace





*-----------------------------------------------------------------------------------*
*                         Preparing for Imputation                                  *
*                                   This is all my coding now                       *
*-----------------------------------------------------------------------------------*
use "Data\bond_outcomes.dta", clear

capture drop _merge
sort district_code year
merge m:1 district_code year using Source\ca_allyrs_reduced
tab year _merge 
*drop _merge


 foreach var of varlist capital_balquest capitaloutlays classsize curriculum athletic technology safety statematch newschool communitycollege {
 gen `var'_num=(`var'=="Y")
 drop `var'
 rename `var'_num `var'
 }
 gen madeayp_num = (madeayp=="Yes")
 drop madeayp
 rename madeayp_num madeayp
destring ada, replace
save "Data\complete.dta", replace

                                                       ** preparing data for Amelia **
drop if _merge==2
drop if year<1998
drop if newleaid==""
drop if percent==.
*NOTE - data set is unique by newleaid year refid
isid newleaid refid year
order newleaid year refid win percent dyear classsize averageclasssize 

 gen percent2=percent^2
 gen percent3=percent^3
 *Make leads and lags
  foreach v in percent percent2 percent3 win req gobond {
   forvalues dy=1/19 {
    gen `v'_m`dy'=`v'*(dyear==-`dy')
    }
   forvalues dy=0/19 {
    gen `v'_`dy'=`v'*(dyear==`dy')
   }
   *drop `v'
  }



*NOTE: IT APPEARS HE RESTRICTS THE DATA TO ONLY GOV BOND WHICH TAKES US FROM 1554 TO 1230
*Use only years -2 to +6
 keep if dyear>=-2 & dyear<=6

tab year, gen(yrdums)
tab dyear, gen(dydums)

#delimit; 
keep  newleaid year refid gobond tcapout_pp totalexp_pp tcurinst_pp expendituresperada englishlearners 
freeorreducedmeals minority ethnicdiversityindex averageclasssize numberofteachers numberoffulltimeequivalents fullycredentialled 
pupilsperadministrator averageteachersalary  yeardropoutrate twoyrsexperience ba60salary proficientlangarts proficientmath 
std_ms_score_31 std_ms_score_32 std_ms_score_71 std_ms_score_72 classsize meastodate winstodate month percent req ada mv win dyear;
# delimit cr


win_m1 win_1 win_2 win_3 win_4 win_5 win_6 dydums1 dydums2 dydums3 dydums4  dydums5 dydums6 dydums7 dydums8 dydums9 req_1 req_2 req_3 req_4 req_5
req_6 percent_1 percent_2 percent_3 percent_4 percent_5 percent_6 percent2_1 percent2_2 percent2_3 percent2_4 percent2_5 percent2_6 percent3_1 
percent3_2 percent3_3 percent3_4 percent3_5 percent3_6 ;



save "Data\amelia.dta", replace

*----------------------------------------------------------------------------------------*
*
*                                    ANALYSIS                                            *
*
*----------------------------------------------------------------------------------------*

*------------------------------------------------------------------------------------------*
*                                  TABLE 4A  - ITT                                         *
*------------------------------------------------------------------------------------------*
***NOTE: I had to install outreg often***
/*
*NOTE: IT APPEARS HE RESTRICTS THE DATA TO ONLY GOV BOND WHICH TAKES US FROM 1554 TO 1230

* i CHANGE HIS CODE TO KEEP EVERYONE!!!
use Data\complete.dta, clear


*Use only years -2 to +6
 keep if dyear>=-2 & dyear<=6
 *Drop other variables
  foreach v in percent percent2 percent3 percent4 win req {
    forvalues y=7/19 {
      drop `v'_`y'
      }
    forvalues y=3/19 {
      drop `v'_m`y'
      }
    }

*Clean up the measure outcomes
*NOTE: I really dont understand what he is doing here with the variables
 gen bondonballot=isgobond*(dyear~=0)
 gen winbond=bondonballot*iswin
 replace winbond=0 if bondonballot==0 & iswin==.

tab year, gen(yrdums)
tab dyear, gen(dydums)

*Exclude year 0 from all analyses
foreach v of varlist win_0 percent_0 percent2_0 percent3_0 req_0 {
	rename `v' tmp_`v'
	}

*Add together state and federal revenue
 gen tstfedrev_pp=tstrev_pp+tfedrev_pp

*Start to run the regressions
*Design:  MEASURE FEs, cubic controls, GO bonds only
*NOTE: so teh measure FE make it so we are only looking at the variation within time panel for that district relative to that one election!!!!

 *Start with unrestricted effects, leaving out future years
 set more off
  foreach v of varlist totalexp_pp tcapout_pp tcurinst_pp tstfedrev_pp {
  	 if "`v'"=="totalexp_pp" {
  	 	local apprep "replace"
  	 }
  	 else {
  	 	local apprep "append"
  	 }
     areg `v' yrdums* dydums* win_? req_? percent_? percent2_? percent3_? if gobond==1, absorb(refid) cluster(newleaid)
     *Tests:  All past
      testparm win_?
      local p_past=r(p)
     outreg using "Output\fiscaltab_rf.txt", bdec(8) tdec(8) adec(8) se noaster noparen nolabel addstat("Ppast", `p_past') `apprep'
  }
*/
*------------------------------------------------------------------------------------------------------*
*                                  Our exploritory analysis of achievement outcomes
*                                           	THESE ARE FROM THE PAPER
*-------------------------------------------------------------------------------------------------------*
***NOTE: I had to install outreg often***
use Data\complete.dta , clear

*Make squared, cubed, etc. vars
*NOTE: This code creates lagged vars for all of the vars indicated for all cross sections of time in each row
 gen percent2=percent^2
 gen percent3=percent^3
 *Make leads and lags
  foreach v in percent percent2 percent3 win req gobond {
   forvalues dy=1/19 {
    gen `v'_m`dy'=`v'*(dyear==-`dy')
    }
   forvalues dy=0/19 {
    gen `v'_`dy'=`v'*(dyear==`dy')
   }
   *drop `v'
  }



*NOTE: IT APPEARS HE RESTRICTS THE DATA TO ONLY GOV BOND WHICH TAKES US FROM 1554 TO 1230
*Use only years -2 to +6
 keep if dyear>=-2 & dyear<=6

 /*
 *Drop other variables
  foreach v in percent percent2 percent3 win req gobond {
    forvalues y=7/19 {
      drop `v'_`y'
      }
    forvalues y=3/19 {
      drop `v'_m`y'
      }
    }
*/
tab year, gen(yrdums)
tab dyear, gen(dydums)

*Exclude year 0 from all analyses
foreach v of varlist gobond_0 win_0 percent_0 percent2_0 percent3_0 req_0 {
	rename `v' tmp_`v'
	}
	
*** focusing on classsize***

* all measures
estimates clear
eststo: areg averageclasssize yrdums* dydums* win_m1 win_?  req_? percent_? percent2_? percent3_? if classsize==1, absorb(refid) 
testparm win_?
estout using Output\Rothstein_classsize_anymeasure.rft, cells(b(star fmt(2)) se(fmt(2))) legend label title(Regression Models) mlabels("31" "32" "71" "72") varlabels(_cons INTERCEPT) stats(N r2 df_r, fmt(0 3 0) label (N R2 DF)) style(fixed)

* bonds
estimates clear
eststo: areg averageclasssize yrdums* dydums* win_m1 win_?  req_? percent_? percent2_? percent3_? if classsize==1 & gobond==1, absorb(refid) 
testparm win_?
estout using Output\Rothstein_classsize_gobond.rtf, cells(b(star fmt(2)) se(fmt(2))) legend label title(Regression Models) mlabels("31" "32" "71" "72") varlabels(_cons INTERCEPT) stats(N r2 df_r, fmt(0 3 0) label (N R2 DF)) style(fixed)estimates clear

* parcel 
estimates clear
eststo: areg averageclasssize yrdums* dydums* win_m1 win_?  req_? percent_? percent2_? percent3_? if classsize==1 & gobond~=1, absorb(refid) 
testparm win_?
estout using Output\Rothstein_classsize_parcel.rtf, cells(b(star fmt(2)) se(fmt(2))) legend label title(Regression Models) mlabels("31" "32" "71" "72") varlabels(_cons INTERCEPT) stats(N r2 df_r, fmt(0 3 0) label (N R2 DF)) style(fixed)

*Here I run ITT on all of our Ed Data outcomes
estimates clear
foreach var of varlist std_ms_score_31 std_ms_score_32 std_ms_score_71  std_ms_score_72{
eststo: areg `var' yrdums* dydums* win_? req_? percent_? percent2_? percent3_? if classsize==1 & gobond==1, absorb(refid)
}
estout using Output\Rothstein_testscores_gobond.rtf, cells(b(star fmt(2)) se(fmt(2))) legend label title(Regression Models) mlabels("31" "32" "71" "72") varlabels(_cons INTERCEPT) stats(N r2 df_r, fmt(0 3 0) label (N R2 DF)) style(fixed)

eststo: areg `var' yrdums* dydums* win_? req_? percent_? percent2_? percent3_? if classsize==1 & gobond~=1, absorb(refid)
eststo: areg `var' yrdums* dydums* win_? req_? percent_? percent2_? percent3_? if classsize==1,  absorb(refid)
}
estout using Output\Rothstein_testscores.rtf, cells(b(star fmt(3)) se(par fmt(2))) legend label title(Regression Models) mlabels("31" "32" "71" "72") varlabels(_cons INTERCEPT) stats(N r2 df_r, fmt(0 3 0) label (N R2 DF)) style(fixed)
						 
				 
foreach var of varlist numberofteachers fullycredentialled averageteachersalary twoyrsexperience ba60salary{
eststo: areg `var' yrdums* dydums* win_? req_? percent_? percent2_? percent3_? if classsize==1 & gobond==1, absorb(refid) 
eststo: areg `var' yrdums* dydums* win_? req_? percent_? percent2_? percent3_? if classsize==1 & gobond~=1, absorb(refid)
eststo: areg `var' yrdums* dydums* win_? req_? percent_? percent2_? percent3_? if classsize==1  ,absorb(refid) 
}							 
estout using Output\Rothstein_teacher_outcomes.rtf, cells(b(star fmt(3)) se(par fmt(2))) legend label title(Regression Models) mlabels("31" "32" "71" "72") varlabels(_cons INTERCEPT) stats(N r2 df_r, fmt(0 3 0) label (N R2 DF)) style(fixed)

drop _merge
sort newleaid year refid 
order newleaid year refid
drop ada                        
save "Data/baseimpute.dta" , replace
							 
							 
******************************************** trying to copy graphs

#delimit;
clear matrix;
use Data\complete, clear;

*gen mv=percent-req;
gen mvbin=(ceil(abs(percent-req))-0.5)*sign(percent-req);
replace mvbin=0.5 if percent==req & percent<.;
gen mvbin2=(2*ceil(abs(percent-req)/2) - 1)*sign(percent-req);
replace mvbin2=1 if percent==req & percent<.;

 *gen bondonballot=isgobond;
 *gen winbond=bondonballot*iswin;
 *replace winbond=0 if bondonballot==0 & iswin==.;

keep if fall_mem>50;
tempfile basedat;
save `basedat';

*Regress outcomes on year dummies and bin dummies, separately by relative year;
 tab year, gen(yrdums);
 tab mvbin2, gen(bin2dums);
 forvalues b=1/34 {;
	 su mvbin2 if bin2dums`b'==1, meanonly;
 	 local mv`b'=r(mean);
 	};
 *Drop bin for -1;
  gen binm1=(mvbin2==-1);
  assert bin2dums20==binm1;
  drop bin2dums20;

 foreach v in averageclasssize {;
 	 matrix ests_`v'=J(34, 9, 0);
   forvalues col=1/9 {;
 	 	 reg `v' yrdums* bin2dums* if dyear==`col'-1;
 	 	 forvalues b=1/34 {;
 	 	 	 if `b'~=20 {;
 	 	 	 	 local est=_b[bin2dums`b'];
 	 	 	 	 matrix ests_`v'[`b', `col']=`est';
 	 	 	 };
 	 	 };
 	 };
 	 matrix colnames ests_`v'=rym2 rym1 ry0 ry1 ry2 ry3 ry4 ry5 ry6;
 	 matrix coleq ests_`v'=`v';
 };

drop _all;
set obs 34;
svmat ests_averageclasssize, names(eqcol);
*svmat ests_tcapout, names(eqcol);
*svmat ests_tcurinst, names(eqcol);
*svmat ests_ltdebt, names(eqcol);
gen mvbin=0;
forvalues i=1/34 {;
	replace mvbin=`mv`i'' in `i';
};


*Total expenditures;
scatter averageclasssizerym1 averageclasssizery3 mvbin if mvbin>=-10 & mvbin<=10, c(l l) clpattern(dash solid)
          xline(0)
          legend(label(1 "Year before election") label(2 "Three years after election"))
          xtitle("Vote share relative to threshold (2 pp bins)")
          ytitle("Mean total expenditures PP") ylabel(-500 (500) 1500)
        title("Total expenditures per pupil, by margin of victory"
              "one year before and three years after election")
        note("Notes:  Graphs show average capital outlays per pupil in each bin in the listed year relative to the "
             "election.  Averages are conditional on year fixed effects, and the -1 bin is normalized to zero."
             "Measures that passed by between 0.001% and 2% are assigned to the 1 bin, those that passed by"
             "between 2.001% and 4% are assigned to the 3 bin, and so on; those that failed by similar margins "
             "are assigned to the -1 and -3 bins, respectively.  Districts with fewer than 50 students are excluded.")
          saving(fig2a, replace) ;
		  
		  



 















